{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d71bce70-9dc3-448b-9f9a-8896e83b6d09",
   "metadata": {},
   "source": [
    "# Logistic Regression Classifier"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5b48fc7-4f46-4d5a-8558-cd06892aaa27",
   "metadata": {},
   "source": [
    "## 1) Installing Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc4fa295-5c62-4888-bcf8-d07d6a7afc47",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext watermark\n",
    "%watermark -v -p numpy,pandas,matplotlib,torch -conda"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9549676-2fa5-41a7-bbb9-ce03f5797c34",
   "metadata": {},
   "source": [
    "## 2) Loading the Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f609024c-3eae-4ad5-8cb8-b95b403b7606",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\"toydata-truncated.txt\", sep=\"\\t\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "319546d0-e9ed-4542-873e-395edc05ef2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = df[[\"x1\", \"x2\"]].values\n",
    "y_train = df[\"label\"].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71792068-9926-41bb-81c0-2a46f6e956fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2571853-0be0-48b2-9985-8a6021d01276",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a5e5ffb-1bca-4f1b-b4cf-a78be1b07753",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68bfbbf9-4fed-4111-8391-15f2b338d8b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6800df4-98f6-401e-bb6c-9964c3b6e3cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "np.bincount(y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fc4663a6-e8a7-472e-b9b0-c64f546a85e9",
   "metadata": {},
   "source": [
    "## 3) Visualizing the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36a879c3-0c84-4476-a79a-f41d897c696a",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd31bb2e-5699-43d4-8874-38e9307ce853",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(\n",
    "    X_train[y_train == 0, 0],\n",
    "    X_train[y_train == 0, 1],\n",
    "    marker=\"D\",\n",
    "    markersize=10,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 0\",\n",
    ")\n",
    "\n",
    "plt.plot(\n",
    "    X_train[y_train == 1, 0],\n",
    "    X_train[y_train == 1, 1],\n",
    "    marker=\"^\",\n",
    "    markersize=13,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 1\",\n",
    ")\n",
    "\n",
    "plt.legend(loc=2)\n",
    "\n",
    "plt.xlim([-5, 5])\n",
    "plt.ylim([-5, 5])\n",
    "\n",
    "plt.xlabel(\"Feature $x_1$\", fontsize=12)\n",
    "plt.ylabel(\"Feature $x_2$\", fontsize=12)\n",
    "\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56e353d0-f75d-4267-8b41-68433780d590",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = (X_train - X_train.mean(axis=0)) / X_train.std(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "295f3b29-2b8e-4280-8f25-92b4429002d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(\n",
    "    X_train[y_train == 0, 0],\n",
    "    X_train[y_train == 0, 1],\n",
    "    marker=\"D\",\n",
    "    markersize=10,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 0\",\n",
    ")\n",
    "\n",
    "plt.plot(\n",
    "    X_train[y_train == 1, 0],\n",
    "    X_train[y_train == 1, 1],\n",
    "    marker=\"^\",\n",
    "    markersize=13,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 1\",\n",
    ")\n",
    "\n",
    "plt.legend(loc=2)\n",
    "\n",
    "plt.xlim([-5, 5])\n",
    "plt.ylim([-5, 5])\n",
    "\n",
    "plt.xlabel(\"Feature $x_1$\", fontsize=12)\n",
    "plt.ylabel(\"Feature $x_2$\", fontsize=12)\n",
    "\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "db50db02-3696-4f86-b149-74baabeec6c4",
   "metadata": {},
   "source": [
    "## 4) Implementing the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3da86d9a-7cd5-467c-bf65-3388fe272bd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn.functional as F\n",
    "\n",
    "class LogisticRegression(torch.nn.Module):\n",
    "\n",
    "    def __init__(self, num_features, num_classes):\n",
    "        super().__init__()\n",
    "        self.linear = torch.nn.Linear(num_features, num_classes)\n",
    "\n",
    "    def forward(self, x):\n",
    "        logits = self.linear(x)\n",
    "        return logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e88b360e-c33c-4724-b46f-58323a9a7628",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.manual_seed(1)\n",
    "\n",
    "model = LogisticRegression(num_features=2, num_classes=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2923ae0-cf24-4252-bd19-cc326a39bc72",
   "metadata": {},
   "outputs": [],
   "source": [
    "x = torch.tensor([[1.1, 2.1],\n",
    "                  [1.1, 2.1],\n",
    "                  [9.1, 4.1]])\n",
    "\n",
    "with torch.no_grad():\n",
    "    logits = model(x)\n",
    "    probas = F.softmax(logits, dim=1)\n",
    "\n",
    "print(probas)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12068721-daf9-4ea1-9240-5bcc19c75ce9",
   "metadata": {},
   "source": [
    "## 5) Defining a DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6376d80-e7cf-43b4-96eb-bfd99709b63a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "\n",
    "class MyDataset(Dataset):\n",
    "    def __init__(self, X, y):\n",
    "\n",
    "        self.features = torch.tensor(X, dtype=torch.float32)\n",
    "        self.labels = torch.tensor(y, dtype=torch.int64)\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        x = self.features[index]\n",
    "        y = self.labels[index]\n",
    "        return x, y\n",
    "\n",
    "    def __len__(self):\n",
    "        return self.labels.shape[0]\n",
    "\n",
    "\n",
    "train_ds = MyDataset(X_train, y_train)\n",
    "\n",
    "train_loader = DataLoader(\n",
    "    dataset=train_ds,\n",
    "    batch_size=10,\n",
    "    shuffle=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b64f8926-930c-4945-950a-31083608ea7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "46bc16a0-ec59-4c54-a209-0a5e22406287",
   "metadata": {},
   "source": [
    "## 6) The training loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3dcaa2b1-4019-4128-9ff5-6a966c3abdf2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn.functional as F\n",
    "\n",
    "\n",
    "torch.manual_seed(1)\n",
    "model = LogisticRegression(num_features=2, num_classes=2)\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
    "\n",
    "num_epochs = 20\n",
    "\n",
    "for epoch in range(num_epochs):\n",
    "\n",
    "    model = model.train()\n",
    "    for batch_idx, (features, class_labels) in enumerate(train_loader):\n",
    "\n",
    "\n",
    "        ###############################\n",
    "        ### Complete the training loop\n",
    "        ###\n",
    "        ### Your code below\n",
    "        ###############################\n",
    "\n",
    "        outputs = # ?????\n",
    "\n",
    "        loss = F.cross_entropy(outputs, class_labels)\n",
    "\n",
    "        # ?????\n",
    "        # ?????\n",
    "        # ?????\n",
    "\n",
    "        ################################\n",
    "        ## No changes necessary below\n",
    "        ################################\n",
    "\n",
    "        ### LOGGING\n",
    "        print(f'Epoch: {epoch+1:03d}/{num_epochs:03d}'\n",
    "               f' | Batch {batch_idx:03d}/{len(train_loader):03d}'\n",
    "               f' | Loss: {loss:.2f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb0d5821-7c8d-46b5-9e7d-02e72cac2acc",
   "metadata": {},
   "source": [
    "## 7) Evaluating the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d910ddbb-798f-47ab-8aab-e2dba4aa4005",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_accuracy(model, dataloader):\n",
    "\n",
    "    model = model.eval()\n",
    "\n",
    "    correct = 0.0\n",
    "    total_examples = 0\n",
    "\n",
    "    for idx, (features, class_labels) in enumerate(dataloader):\n",
    "\n",
    "        with torch.no_grad():\n",
    "            logits = model(features)\n",
    "\n",
    "        pred = torch.argmax(logits, dim=1)\n",
    "\n",
    "        compare = class_labels == pred\n",
    "        correct += torch.sum(compare)\n",
    "        total_examples += len(compare)\n",
    "\n",
    "    return correct / total_examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27538c8d-61bc-47b0-8289-b6aab4aa16ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_acc = compute_accuracy(model, train_loader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a4ecf35-4745-43a8-8ea8-14f71cba5b59",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Accuracy: {train_acc*100}%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbcd412a-02c0-4d2c-835e-4b01368f53f8",
   "metadata": {},
   "source": [
    "## 8) Optional: visualizing the decision boundary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a76bb67c-358c-4e91-a5c7-5456333827aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(\n",
    "    X_train[y_train == 0, 0],\n",
    "    X_train[y_train == 0, 1],\n",
    "    marker=\"D\",\n",
    "    markersize=10,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 0\",\n",
    ")\n",
    "\n",
    "plt.plot(\n",
    "    X_train[y_train == 1, 0],\n",
    "    X_train[y_train == 1, 1],\n",
    "    marker=\"^\",\n",
    "    markersize=13,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 1\",\n",
    ")\n",
    "\n",
    "plt.legend(loc=2)\n",
    "\n",
    "plt.xlim([-5, 5])\n",
    "plt.ylim([-5, 5])\n",
    "\n",
    "plt.xlabel(\"Feature $x_1$\", fontsize=12)\n",
    "plt.ylabel(\"Feature $x_2$\", fontsize=12)\n",
    "\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d9f1813-d232-4a7d-aebc-cfd5303fae48",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_boundary(model):\n",
    "\n",
    "    w1 = model.linear.weight[0][0].detach()\n",
    "    w2 = model.linear.weight[0][1].detach()\n",
    "    b = model.linear.bias[0].detach()\n",
    "\n",
    "    x1_min = -20\n",
    "    x2_min = (-(w1 * x1_min) - b) / w2\n",
    "\n",
    "    x1_max = 20\n",
    "    x2_max = (-(w1 * x1_max) - b) / w2\n",
    "\n",
    "    return x1_min, x1_max, x2_min, x2_max"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d71b5df-dd8d-41d5-b6fd-640d6f40d5a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "x1_min, x1_max, x2_min, x2_max = plot_boundary(model)\n",
    "\n",
    "\n",
    "plt.plot(\n",
    "    X_train[y_train == 0, 0],\n",
    "    X_train[y_train == 0, 1],\n",
    "    marker=\"D\",\n",
    "    markersize=10,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 0\",\n",
    ")\n",
    "\n",
    "plt.plot(\n",
    "    X_train[y_train == 1, 0],\n",
    "    X_train[y_train == 1, 1],\n",
    "    marker=\"^\",\n",
    "    markersize=13,\n",
    "    linestyle=\"\",\n",
    "    label=\"Class 1\",\n",
    ")\n",
    "\n",
    "plt.plot([x1_min, x1_max], [x2_min, x2_max], color=\"k\")\n",
    "\n",
    "plt.legend(loc=2)\n",
    "\n",
    "plt.xlim([-5, 5])\n",
    "plt.ylim([-5, 5])\n",
    "\n",
    "plt.xlabel(\"Feature $x_1$\", fontsize=12)\n",
    "plt.ylabel(\"Feature $x_2$\", fontsize=12)\n",
    "\n",
    "plt.grid()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
